#coding:utf-8
import scrapy
import json

class QuotesSpider(scrapy.Spider):
    name = "nvd"

    def start_requests(self):
        url = 'https://nvd.nist.gov'
        tag = getattr(self, 'tag', None)

        if tag is not None:
            url = url + '/vuln/search/results?adv_search=false&form_type=basic&results_type=overview&search_type=all&query=' + tag
            yield scrapy.Request(url, callback=self.parse)

    def parse(self, response):
        #从chrome里拷出来的xpath
        res = response.xpath('//*[@id="p_lt_WebPartZone1_zoneCenter_pageplaceholder_p_lt_WebPartZone1_zoneCenter_VulnerabilitySearchResults_VulnResultsPanel"]/div[1]/div[2]/strong/text()').extract_first()
        #过滤出其中的数字
        num = filter(lambda x: x.isdigit(), res)
        num = int(num)
        outli = ['nvd_num', num, response.url]
        csvout.list2csv(outli)
        yield {
            'nvd_num': num
        }

